Gaussian Processes


In [511]:
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.optimize import minimize
from scipy.spatial.distance import pdist, squareform

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [852]:
# Defining Data
n = 10
a = 3
b = .5
sigma = 5
X = np.random.uniform(0, 10, n)
y_obs = a*X + b + np.random.normal(0, sigma, n)
data = pd.DataFrame(np.array([X, y_obs]).T, columns=['x', 'y'])
data.plot(x='x', y='y', kind='scatter', s=50);


Drawing samples from prior over functions


In [853]:
# Using squared exponential covariance function
def C(Xs, s, b):
    diff = squareform(pdist(Xs.reshape(len(Xs),1), 'euclidean'))
    return np.around(b*np.exp(-np.power(diff/s, 2)), 16)

In [854]:
s = 2
b = 1

N = 25
ys = []
x_ = np.linspace(0, 10, N)

for f in range(3):
    y = np.random.multivariate_normal(np.zeros(len(x_)), C(x_,s,b))
    ys.append(y)
    
for y in ys:
    plt.plot(x_, y)


:: NOTE ::

Constructing squared exponential covariance kernel over a large number of points will produce an ill-conditioned covariance matrix


In [855]:
s = 2 
b = 1
C_eigvals = np.linalg.eigvals(C(x_, s, b))
plt.plot(range(len(C_eigvals)), C_eigvals, 'go');
plt.xlabel("$\lambda$")
plt.ylabel("$\sigma(C)$")
plt.yscale('log')
print "Condition Number of C(x,x'): %.2f" % (max(C_eigvals)/min(C_eigvals))


Condition Number of C(x,x'): 37083057649982592.00

Updating prior based on observations.


In [861]:
s = .5
b = 100
x_ = np.linspace(0,10,N)
Xhat = np.hstack([X, x_])
ys = []

for f in range(10):
    K = C(Xhat, s, b)
    mean = K[n:,:n].dot(np.linalg.inv(K[:n, :n])).dot(y_obs)
    var = K[n:,n:] - K[n:,:n].dot(np.linalg.inv(K[:n, :n])).dot(K[:n, n:])
    y = np.random.multivariate_normal(mean, var)
    ys.append(y)
    
for y in ys:
    plt.plot(x_, y)
    plt.plot(X, y_obs, 'bo')


Accounting for stochasticity in observations


In [862]:
# Estimating hyperparameters of covariance function.
# Using a stochastic grid-search Nelder-Mead optimization strategy
# where our objective is -log[P(y|x)].

# Generally this is not an optimal strategy as we can calculate
# the gradient with respect to this objective and thus utilize
# more powerful optimization algorithms.

def log_y_marginal(theta):
    sigma_eye = theta[2]* np.eye(10)
    K_y = C(X, theta[0], theta[1]) + sigma_eye
    return -.5*(y_obs.T.dot(np.linalg.inv(K_y)).dot(y_obs) +
               np.log(np.linalg.det(K_y)))

nll = lambda theta: -log_y_marginal(theta)

seed_size = 50
seeds = [np.array([t1, t2 , 1]) for t1, t2 in 
         zip(np.random.uniform(.5,1000,seed_size), np.random.uniform(.5,1000,seed_size))]

opt_theta = np.ones(3)
for init in seeds:
    res = minimize(nll, init, method='Nelder-Mead')
    if nll(res.x) < nll(opt_theta):
        opt_theta = res.x

In [863]:
nll(opt_theta)


Out[863]:
28.481892983020352

In [864]:
opt_theta


Out[864]:
array([  -9.51219328,  403.61387988,   54.84339846])

In [865]:
s, b, sigma = opt_theta
N = 10
x_ = np.linspace(0,10,N)
Xhat = np.hstack([X, x_])
ys = []
sigma_eye = np.eye(len(Xhat))*np.append(sigma*np.ones(n), np.ones(N))

for f in range(10):
    K = C(Xhat, s, b) + sigma_eye
    mean = K[n:,:n].dot(np.linalg.inv(K[:n, :n])).dot(y_obs)
    var = K[n:,n:] - K[n:,:n].dot(np.linalg.inv(K[:n, :n])).dot(K[:n, n:])
    y = np.random.multivariate_normal(mean, var)
    ys.append(y)
    
for y in ys:
    plt.plot(x_, y)
    plt.plot(X, y_obs, 'bo')



In [ ]:


In [ ]: